i965/fs: Move brw_wm_compile::fp to fs_visitor.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_emit.cpp
index eecfc92eb5ba71922f6bf3349e1566a283f27cb5..4a1700b8fdb0d186bf84888bc1867b2cabe757ed 100644 (file)
@@ -34,13 +34,15 @@ extern "C" {
 } /* extern "C" */
 
 #include "brw_fs.h"
-#include "../glsl/ir_print_visitor.h"
+#include "brw_cfg.h"
+#include "glsl/ir_print_visitor.h"
 
 void
 fs_visitor::generate_fb_write(fs_inst *inst)
 {
-   GLboolean eot = inst->eot;
+   bool eot = inst->eot;
    struct brw_reg implied_header;
+   uint32_t msg_control;
 
    /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
     * move, here's g1.
@@ -57,6 +59,18 @@ fs_visitor::generate_fb_write(fs_inst *inst)
                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
         brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
+         if (inst->target > 0 &&
+            c->key.nr_color_regions > 1 &&
+            c->key.sample_alpha_to_coverage) {
+            /* Set "Source0 Alpha Present to RenderTarget" bit in message
+             * header.
+             */
+            brw_OR(p,
+                  vec1(retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD)),
+                  vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
+                  brw_imm_ud(0x1 << 11));
+         }
+
         if (inst->target > 0) {
            /* Set the render target index for choosing BLEND_STATE. */
            brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
@@ -77,12 +91,20 @@ fs_visitor::generate_fb_write(fs_inst *inst)
       implied_header = brw_null_reg();
    }
 
+   if (this->dual_src_output.file != BAD_FILE)
+      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
+   else if (dispatch_width == 16)
+      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
+   else
+      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
+
    brw_pop_insn_state(p);
 
    brw_fb_WRITE(p,
-               c->dispatch_width,
+               dispatch_width,
                inst->base_mrf,
                implied_header,
+               msg_control,
                inst->target,
                inst->mlen,
                0,
@@ -111,7 +133,7 @@ fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)
       deltas = brw_imm_v(0x11001100);
    }
 
-   if (c->dispatch_width == 16) {
+   if (dispatch_width == 16) {
       dst = vec16(dst);
    }
 
@@ -143,100 +165,100 @@ fs_visitor::generate_linterp(fs_inst *inst,
 }
 
 void
-fs_visitor::generate_math(fs_inst *inst,
-                         struct brw_reg dst, struct brw_reg *src)
+fs_visitor::generate_math1_gen7(fs_inst *inst,
+                               struct brw_reg dst,
+                               struct brw_reg src0)
 {
-   int op;
+   assert(inst->mlen == 0);
+   brw_math(p, dst,
+           brw_math_function(inst->opcode),
+           0, src0,
+           BRW_MATH_DATA_VECTOR,
+           BRW_MATH_PRECISION_FULL);
+}
 
-   switch (inst->opcode) {
-   case FS_OPCODE_RCP:
-      op = BRW_MATH_FUNCTION_INV;
-      break;
-   case FS_OPCODE_RSQ:
-      op = BRW_MATH_FUNCTION_RSQ;
-      break;
-   case FS_OPCODE_SQRT:
-      op = BRW_MATH_FUNCTION_SQRT;
-      break;
-   case FS_OPCODE_EXP2:
-      op = BRW_MATH_FUNCTION_EXP;
-      break;
-   case FS_OPCODE_LOG2:
-      op = BRW_MATH_FUNCTION_LOG;
-      break;
-   case FS_OPCODE_POW:
-      op = BRW_MATH_FUNCTION_POW;
-      break;
-   case FS_OPCODE_SIN:
-      op = BRW_MATH_FUNCTION_SIN;
-      break;
-   case FS_OPCODE_COS:
-      op = BRW_MATH_FUNCTION_COS;
-      break;
-   default:
-      assert(!"not reached: unknown math function");
-      op = 0;
-      break;
+void
+fs_visitor::generate_math2_gen7(fs_inst *inst,
+                               struct brw_reg dst,
+                               struct brw_reg src0,
+                               struct brw_reg src1)
+{
+   assert(inst->mlen == 0);
+   brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1);
+}
+
+void
+fs_visitor::generate_math1_gen6(fs_inst *inst,
+                               struct brw_reg dst,
+                               struct brw_reg src0)
+{
+   int op = brw_math_function(inst->opcode);
+
+   assert(inst->mlen == 0);
+
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_math(p, dst,
+           op,
+           0, src0,
+           BRW_MATH_DATA_VECTOR,
+           BRW_MATH_PRECISION_FULL);
+
+   if (dispatch_width == 16) {
+      brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+      brw_math(p, sechalf(dst),
+              op,
+              0, sechalf(src0),
+              BRW_MATH_DATA_VECTOR,
+              BRW_MATH_PRECISION_FULL);
+      brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
    }
+}
 
-   if (intel->gen >= 6) {
-      assert(inst->mlen == 0);
+void
+fs_visitor::generate_math2_gen6(fs_inst *inst,
+                               struct brw_reg dst,
+                               struct brw_reg src0,
+                               struct brw_reg src1)
+{
+   int op = brw_math_function(inst->opcode);
 
-      if (inst->opcode == FS_OPCODE_POW) {
-        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-        brw_math2(p, dst, op, src[0], src[1]);
+   assert(inst->mlen == 0);
 
-        if (c->dispatch_width == 16) {
-           brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
-           brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1]));
-           brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
-        }
-      } else {
-        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-        brw_math(p, dst,
-                 op,
-                 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
-                 BRW_MATH_SATURATE_NONE,
-                 0, src[0],
-                 BRW_MATH_DATA_VECTOR,
-                 BRW_MATH_PRECISION_FULL);
-
-        if (c->dispatch_width == 16) {
-           brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
-           brw_math(p, sechalf(dst),
-                    op,
-                    inst->saturate ? BRW_MATH_SATURATE_SATURATE :
-                    BRW_MATH_SATURATE_NONE,
-                    0, sechalf(src[0]),
-                    BRW_MATH_DATA_VECTOR,
-                    BRW_MATH_PRECISION_FULL);
-           brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
-        }
-      }
-   } else /* gen <= 5 */{
-      assert(inst->mlen >= 1);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_math2(p, dst, op, src0, src1);
 
-      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-      brw_math(p, dst,
+   if (dispatch_width == 16) {
+      brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+      brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1));
+      brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+   }
+}
+
+void
+fs_visitor::generate_math_gen4(fs_inst *inst,
+                              struct brw_reg dst,
+                              struct brw_reg src)
+{
+   int op = brw_math_function(inst->opcode);
+
+   assert(inst->mlen >= 1);
+
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_math(p, dst,
+           op,
+           inst->base_mrf, src,
+           BRW_MATH_DATA_VECTOR,
+           BRW_MATH_PRECISION_FULL);
+
+   if (dispatch_width == 16) {
+      brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+      brw_math(p, sechalf(dst),
               op,
-              inst->saturate ? BRW_MATH_SATURATE_SATURATE :
-              BRW_MATH_SATURATE_NONE,
-              inst->base_mrf, src[0],
+              inst->base_mrf + 1, sechalf(src),
               BRW_MATH_DATA_VECTOR,
               BRW_MATH_PRECISION_FULL);
 
-      if (c->dispatch_width == 16) {
-        brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
-        brw_math(p, sechalf(dst),
-                 op,
-                 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
-                 BRW_MATH_SATURATE_NONE,
-                 inst->base_mrf + 1, sechalf(src[0]),
-                 BRW_MATH_DATA_VECTOR,
-                 BRW_MATH_PRECISION_FULL);
-
-        brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
-      }
+      brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
    }
 }
 
@@ -246,13 +268,26 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
    int msg_type = -1;
    int rlen = 4;
    uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
+   uint32_t return_format;
 
-   if (c->dispatch_width == 16)
+   switch (dst.type) {
+   case BRW_REGISTER_TYPE_D:
+      return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
+      break;
+   case BRW_REGISTER_TYPE_UD:
+      return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
+      break;
+   default:
+      return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
+      break;
+   }
+
+   if (dispatch_width == 16)
       simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
 
    if (intel->gen >= 5) {
       switch (inst->opcode) {
-      case FS_OPCODE_TEX:
+      case SHADER_OPCODE_TEX:
         if (inst->shadow_compare) {
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
         } else {
@@ -266,25 +301,34 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
         }
         break;
-      case FS_OPCODE_TXL:
+      case SHADER_OPCODE_TXL:
         if (inst->shadow_compare) {
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
         } else {
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
         }
         break;
-      case FS_OPCODE_TXD:
+      case SHADER_OPCODE_TXS:
+        msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
+        break;
+      case SHADER_OPCODE_TXD:
         /* There is no sample_d_c message; comparisons are done manually */
         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
         break;
+      case SHADER_OPCODE_TXF:
+        msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
+        break;
+      default:
+        assert(!"not reached");
+        break;
       }
    } else {
       switch (inst->opcode) {
-      case FS_OPCODE_TEX:
+      case SHADER_OPCODE_TEX:
         /* Note that G45 and older determines shadow compare and dispatch width
          * from message length for most messages.
          */
-        assert(c->dispatch_width == 8);
+        assert(dispatch_width == 8);
         msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
         if (inst->shadow_compare) {
            assert(inst->mlen == 6);
@@ -302,7 +346,7 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
            simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
         }
         break;
-      case FS_OPCODE_TXL:
+      case SHADER_OPCODE_TXL:
         if (inst->shadow_compare) {
            assert(inst->mlen == 6);
            msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
@@ -312,11 +356,24 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
            simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
         }
         break;
-      case FS_OPCODE_TXD:
+      case SHADER_OPCODE_TXD:
         /* There is no sample_d_c message; comparisons are done manually */
         assert(inst->mlen == 7 || inst->mlen == 10);
         msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
         break;
+      case SHADER_OPCODE_TXF:
+        assert(inst->mlen == 9);
+        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
+        simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+        break;
+      case SHADER_OPCODE_TXS:
+        assert(inst->mlen == 3);
+        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
+        simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+        break;
+      default:
+        assert(!"not reached");
+        break;
       }
    }
    assert(msg_type != -1);
@@ -326,6 +383,27 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
       dst = vec16(dst);
    }
 
+   /* Load the message header if present.  If there's a texture offset,
+    * we need to set it up explicitly and load the offset bitfield.
+    * Otherwise, we can use an implied move from g0 to the first message reg.
+    */
+   if (inst->texture_offset) {
+      brw_push_insn_state(p);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      /* Explicitly set up the message header by copying g0 to the MRF. */
+      brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
+                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+
+      /* Then set the offset bits in DWord 2. */
+      brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+                                     inst->base_mrf, 2), BRW_REGISTER_TYPE_UD),
+                 brw_imm_ud(inst->texture_offset));
+      brw_pop_insn_state(p);
+   } else if (inst->header_present) {
+      /* Set up an implied move from g0 to the MRF. */
+      src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
+   }
+
    brw_SAMPLE(p,
              retype(dst, BRW_REGISTER_TYPE_UW),
              inst->base_mrf,
@@ -336,9 +414,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
              msg_type,
              rlen,
              inst->mlen,
-             0,
              inst->header_present,
-             simd_mode);
+             simd_mode,
+             return_format);
 }
 
 
@@ -386,8 +464,13 @@ fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
    brw_ADD(p, dst, src0, negate(src1));
 }
 
+/* The negate_value boolean is used to negate the derivative computation for
+ * FBOs, since they place the origin at the upper left instead of the lower
+ * left.
+ */
 void
-fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
+fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+                         bool negate_value)
 {
    struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
                                 BRW_REGISTER_TYPE_F,
@@ -401,7 +484,10 @@ fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
                                 BRW_WIDTH_4,
                                 BRW_HORIZONTAL_STRIDE_0,
                                 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
-   brw_ADD(p, dst, src0, negate(src1));
+   if (negate_value)
+      brw_ADD(p, dst, src1, negate(src0));
+   else
+      brw_ADD(p, dst, src0, negate(src1));
 }
 
 void
@@ -499,7 +585,9 @@ fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
 }
 
 void
-fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
+fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst,
+                                       struct brw_reg index,
+                                       struct brw_reg offset)
 {
    assert(inst->mlen != 0);
 
@@ -516,8 +604,16 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
    if (intel->gen == 4 && !intel->is_g4x)
       brw_MOV(p, brw_null_reg(), dst);
 
+   assert(index.file == BRW_IMMEDIATE_VALUE &&
+         index.type == BRW_REGISTER_TYPE_UD);
+   uint32_t surf_index = index.dw1.ud;
+
+   assert(offset.file == BRW_IMMEDIATE_VALUE &&
+         offset.type == BRW_REGISTER_TYPE_UD);
+   uint32_t read_offset = offset.dw1.ud;
+
    brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
-                       inst->offset, SURF_INDEX_FRAG_CONST_BUFFER);
+                       read_offset, surf_index);
 
    if (intel->gen == 4 && !intel->is_g4x) {
       /* gen4 errata: destination from a send can't be used as a
@@ -528,6 +624,44 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
    }
 }
 
+
+/**
+ * Cause the current pixel/sample mask (from R1.7 bits 15:0) to be transferred
+ * into the flags register (f0.0).
+ *
+ * Used only on Gen6 and above.
+ */
+void
+fs_visitor::generate_mov_dispatch_to_flags()
+{
+   struct brw_reg f0 = brw_flag_reg();
+   struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+
+   assert (intel->gen >= 6);
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_MOV(p, f0, g1);
+   brw_pop_insn_state(p);
+}
+
+
+static uint32_t brw_file_from_reg(fs_reg *reg)
+{
+   switch (reg->file) {
+   case ARF:
+      return BRW_ARCHITECTURE_REGISTER_FILE;
+   case GRF:
+      return BRW_GENERAL_REGISTER_FILE;
+   case MRF:
+      return BRW_MESSAGE_REGISTER_FILE;
+   case IMM:
+      return BRW_IMMEDIATE_VALUE;
+   default:
+      assert(!"not reached");
+      return BRW_GENERAL_REGISTER_FILE;
+   }
+}
+
 static struct brw_reg
 brw_reg_from_fs_reg(fs_reg *reg)
 {
@@ -538,11 +672,9 @@ brw_reg_from_fs_reg(fs_reg *reg)
    case ARF:
    case MRF:
       if (reg->smear == -1) {
-        brw_reg = brw_vec8_reg(reg->file,
-                               reg->hw_reg, 0);
+        brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
       } else {
-        brw_reg = brw_vec1_reg(reg->file,
-                               reg->hw_reg, reg->smear);
+        brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, reg->smear);
       }
       brw_reg = retype(brw_reg, reg->type);
       if (reg->sechalf)
@@ -592,33 +724,59 @@ brw_reg_from_fs_reg(fs_reg *reg)
 void
 fs_visitor::generate_code()
 {
-   int last_native_inst = p->nr_insn;
+   int last_native_insn_offset = p->next_insn_offset;
    const char *last_annotation_string = NULL;
-   ir_instruction *last_annotation_ir = NULL;
-
-   int loop_stack_array_size = 16;
-   int loop_stack_depth = 0;
-   brw_instruction **loop_stack =
-      rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
-   int *if_depth_in_loop =
-      rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
-
+   const void *last_annotation_ir = NULL;
 
    if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
-      printf("Native code for fragment shader %d (%d-wide dispatch):\n",
-            prog->Name, c->dispatch_width);
+      if (shader) {
+         printf("Native code for fragment shader %d (%d-wide dispatch):\n",
+                prog->Name, dispatch_width);
+      } else {
+         printf("Native code for fragment program %d (%d-wide dispatch):\n",
+                fp->Base.Id, dispatch_width);
+      }
    }
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   cfg_t *cfg = NULL;
+   if (unlikely(INTEL_DEBUG & DEBUG_WM))
+      cfg = new(mem_ctx) cfg_t(this);
+
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
       struct brw_reg src[3], dst;
 
       if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+        foreach_list(node, &cfg->block_list) {
+           bblock_link *link = (bblock_link *)node;
+           bblock_t *block = link->block;
+
+           if (block->start == inst) {
+              printf("   START B%d", block->block_num);
+              foreach_list(predecessor_node, &block->parents) {
+                 bblock_link *predecessor_link =
+                    (bblock_link *)predecessor_node;
+                 bblock_t *predecessor_block = predecessor_link->block;
+                 printf(" <-B%d", predecessor_block->block_num);
+              }
+              printf("\n");
+           }
+        }
+
         if (last_annotation_ir != inst->ir) {
            last_annotation_ir = inst->ir;
            if (last_annotation_ir) {
               printf("   ");
-              last_annotation_ir->print();
+               if (shader)
+                  ((ir_instruction *)inst->ir)->print();
+               else {
+                  const prog_instruction *fpi;
+                  fpi = (const prog_instruction *)inst->ir;
+                  printf("%d: ", (int)(fpi - fp->Base.Instructions));
+                  _mesa_fprint_instruction_opt(stdout,
+                                               fpi,
+                                               0, PROG_PRINT_DEBUG, NULL);
+               }
               printf("\n");
            }
         }
@@ -631,15 +789,25 @@ fs_visitor::generate_code()
 
       for (unsigned int i = 0; i < 3; i++) {
         src[i] = brw_reg_from_fs_reg(&inst->src[i]);
+
+        /* The accumulator result appears to get used for the
+         * conditional modifier generation.  When negating a UD
+         * value, there is a 33rd bit generated for the sign in the
+         * accumulator value, so now you can't check, for example,
+         * equality with a 32-bit value.  See piglit fs-op-neg-uvec4.
+         */
+        assert(!inst->conditional_mod ||
+               inst->src[i].type != BRW_REGISTER_TYPE_UD ||
+               !inst->src[i].negate);
       }
       dst = brw_reg_from_fs_reg(&inst->dst);
 
       brw_set_conditionalmod(p, inst->conditional_mod);
-      brw_set_predicate_control(p, inst->predicated);
+      brw_set_predicate_control(p, inst->predicate);
       brw_set_predicate_inverse(p, inst->predicate_inverse);
       brw_set_saturate(p, inst->saturate);
 
-      if (inst->force_uncompressed || c->dispatch_width == 8) {
+      if (inst->force_uncompressed || dispatch_width == 8) {
         brw_set_compression_control(p, BRW_COMPRESSION_NONE);
       } else if (inst->force_sechalf) {
         brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
@@ -657,6 +825,25 @@ fs_visitor::generate_code()
       case BRW_OPCODE_MUL:
         brw_MUL(p, dst, src[0], src[1]);
         break;
+      case BRW_OPCODE_MACH:
+        brw_set_acc_write_control(p, 1);
+        brw_MACH(p, dst, src[0], src[1]);
+        brw_set_acc_write_control(p, 0);
+        break;
+
+      case BRW_OPCODE_MAD:
+        brw_set_access_mode(p, BRW_ALIGN_16);
+        if (dispatch_width == 16) {
+           brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+           brw_MAD(p, dst, src[0], src[1], src[2]);
+           brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+           brw_MAD(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
+           brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+        } else {
+           brw_MAD(p, dst, src[0], src[1], src[2]);
+        }
+        brw_set_access_mode(p, BRW_ALIGN_1);
+        break;
 
       case BRW_OPCODE_FRC:
         brw_FRC(p, dst, src[0]);
@@ -706,9 +893,8 @@ fs_visitor::generate_code()
            assert(intel->gen == 6);
            gen6_IF(p, inst->conditional_mod, src[0], src[1]);
         } else {
-           brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
+           brw_IF(p, dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
         }
-        if_depth_in_loop[loop_stack_depth]++;
         break;
 
       case BRW_OPCODE_ELSE:
@@ -716,70 +902,54 @@ fs_visitor::generate_code()
         break;
       case BRW_OPCODE_ENDIF:
         brw_ENDIF(p);
-        if_depth_in_loop[loop_stack_depth]--;
         break;
 
       case BRW_OPCODE_DO:
-        loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
-        if (loop_stack_array_size <= loop_stack_depth) {
-           loop_stack_array_size *= 2;
-           loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
-                                 loop_stack_array_size);
-           if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
-                                       loop_stack_array_size);
-        }
-        if_depth_in_loop[loop_stack_depth] = 0;
+        brw_DO(p, BRW_EXECUTE_8);
         break;
 
       case BRW_OPCODE_BREAK:
-        brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
+        brw_BREAK(p);
         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
         break;
       case BRW_OPCODE_CONTINUE:
         /* FINISHME: We need to write the loop instruction support still. */
         if (intel->gen >= 6)
-           gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
+           gen6_CONT(p);
         else
-           brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
+           brw_CONT(p);
         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
         break;
 
-      case BRW_OPCODE_WHILE: {
-        struct brw_instruction *inst0, *inst1;
-        GLuint br = 1;
-
-        if (intel->gen >= 5)
-           br = 2;
+      case BRW_OPCODE_WHILE:
+        brw_WHILE(p);
+        break;
 
-        assert(loop_stack_depth > 0);
-        loop_stack_depth--;
-        inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
-        if (intel->gen < 6) {
-           /* patch all the BREAK/CONT instructions from last BGNLOOP */
-           while (inst0 > loop_stack[loop_stack_depth]) {
-              inst0--;
-              if (inst0->header.opcode == BRW_OPCODE_BREAK &&
-                  inst0->bits3.if_else.jump_count == 0) {
-                 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
-           }
-              else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
-                       inst0->bits3.if_else.jump_count == 0) {
-                 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
-              }
-           }
+      case SHADER_OPCODE_RCP:
+      case SHADER_OPCODE_RSQ:
+      case SHADER_OPCODE_SQRT:
+      case SHADER_OPCODE_EXP2:
+      case SHADER_OPCODE_LOG2:
+      case SHADER_OPCODE_SIN:
+      case SHADER_OPCODE_COS:
+        if (intel->gen >= 7) {
+           generate_math1_gen7(inst, dst, src[0]);
+        } else if (intel->gen == 6) {
+           generate_math1_gen6(inst, dst, src[0]);
+        } else {
+           generate_math_gen4(inst, dst, src[0]);
         }
-      }
         break;
-
-      case FS_OPCODE_RCP:
-      case FS_OPCODE_RSQ:
-      case FS_OPCODE_SQRT:
-      case FS_OPCODE_EXP2:
-      case FS_OPCODE_LOG2:
-      case FS_OPCODE_POW:
-      case FS_OPCODE_SIN:
-      case FS_OPCODE_COS:
-        generate_math(inst, dst, src);
+      case SHADER_OPCODE_INT_QUOTIENT:
+      case SHADER_OPCODE_INT_REMAINDER:
+      case SHADER_OPCODE_POW:
+        if (intel->gen >= 7) {
+           generate_math2_gen7(inst, dst, src[0], src[1]);
+        } else if (intel->gen == 6) {
+           generate_math2_gen6(inst, dst, src[0], src[1]);
+        } else {
+           generate_math_gen4(inst, dst, src[0]);
+        }
         break;
       case FS_OPCODE_PIXEL_X:
         generate_pixel_xy(dst, true);
@@ -793,10 +963,12 @@ fs_visitor::generate_code()
       case FS_OPCODE_LINTERP:
         generate_linterp(inst, dst, src);
         break;
-      case FS_OPCODE_TEX:
+      case SHADER_OPCODE_TEX:
       case FS_OPCODE_TXB:
-      case FS_OPCODE_TXD:
-      case FS_OPCODE_TXL:
+      case SHADER_OPCODE_TXD:
+      case SHADER_OPCODE_TXF:
+      case SHADER_OPCODE_TXL:
+      case SHADER_OPCODE_TXS:
         generate_tex(inst, dst, src[0]);
         break;
       case FS_OPCODE_DISCARD:
@@ -806,7 +978,11 @@ fs_visitor::generate_code()
         generate_ddx(inst, dst, src[0]);
         break;
       case FS_OPCODE_DDY:
-        generate_ddy(inst, dst, src[0]);
+         /* Make sure fp->UsesDFdy flag got set (otherwise there's no
+          * guarantee that c->key.render_to_fbo is set).
+          */
+         assert(fp->UsesDFdy);
+        generate_ddy(inst, dst, src[0], c->key.render_to_fbo);
         break;
 
       case FS_OPCODE_SPILL:
@@ -818,16 +994,21 @@ fs_visitor::generate_code()
         break;
 
       case FS_OPCODE_PULL_CONSTANT_LOAD:
-        generate_pull_constant_load(inst, dst);
+        generate_pull_constant_load(inst, dst, src[0], src[1]);
         break;
 
       case FS_OPCODE_FB_WRITE:
         generate_fb_write(inst);
         break;
+
+      case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
+         generate_mov_dispatch_to_flags();
+         break;
+
       default:
-        if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
+        if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
            _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
-                         brw_opcodes[inst->opcode].name);
+                         opcode_descs[inst->opcode].name);
         } else {
            _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
         }
@@ -835,28 +1016,33 @@ fs_visitor::generate_code()
       }
 
       if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
-        for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
-           if (0) {
-              printf("0x%08x 0x%08x 0x%08x 0x%08x ",
-                     ((uint32_t *)&p->store[i])[3],
-                     ((uint32_t *)&p->store[i])[2],
-                     ((uint32_t *)&p->store[i])[1],
-                     ((uint32_t *)&p->store[i])[0]);
+        brw_dump_compile(p, stdout,
+                         last_native_insn_offset, p->next_insn_offset);
+
+        foreach_list(node, &cfg->block_list) {
+           bblock_link *link = (bblock_link *)node;
+           bblock_t *block = link->block;
+
+           if (block->end == inst) {
+              printf("   END B%d", block->block_num);
+              foreach_list(successor_node, &block->children) {
+                 bblock_link *successor_link =
+                    (bblock_link *)successor_node;
+                 bblock_t *successor_block = successor_link->block;
+                 printf(" ->B%d", successor_block->block_num);
+              }
+              printf("\n");
            }
-           brw_disasm(stdout, &p->store[i], intel->gen);
         }
       }
 
-      last_native_inst = p->nr_insn;
+      last_native_insn_offset = p->next_insn_offset;
    }
 
    if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
       printf("\n");
    }
 
-   ralloc_free(loop_stack);
-   ralloc_free(if_depth_in_loop);
-
    brw_set_uip_jip(p);
 
    /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
@@ -865,15 +1051,6 @@ fs_visitor::generate_code()
     * case you're doing that.
     */
    if (0) {
-      if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
-        for (unsigned int i = 0; i < p->nr_insn; i++) {
-           printf("0x%08x 0x%08x 0x%08x 0x%08x ",
-                  ((uint32_t *)&p->store[i])[3],
-                  ((uint32_t *)&p->store[i])[2],
-                  ((uint32_t *)&p->store[i])[1],
-                  ((uint32_t *)&p->store[i])[0]);
-           brw_disasm(stdout, &p->store[i], intel->gen);
-        }
-      }
+      brw_dump_compile(p, stdout, 0, p->next_insn_offset);
    }
 }