i965: Add support for ir_loop counters to the new FS backend.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
index 9d022e07fbc42b51782735fbbf82b3888af4f6aa..c1e62062310e6d4223f22bf3352bb2cb6c89eb0d 100644 (file)
@@ -34,6 +34,7 @@ extern "C" {
 #include "program/prog_parameter.h"
 #include "program/prog_print.h"
 #include "program/prog_optimize.h"
+#include "program/sampler.h"
 #include "program/hash_table.h"
 #include "brw_context.h"
 #include "brw_eu.h"
@@ -74,6 +75,7 @@ enum fs_opcodes {
 };
 
 static int using_new_fs = -1;
+static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
 
 struct gl_shader *
 brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
@@ -135,13 +137,28 @@ brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
         do_sub_to_add_neg(shader->ir);
         do_explog_to_explog2(shader->ir);
 
-        brw_do_channel_expressions(shader->ir);
-        brw_do_vector_splitting(shader->ir);
-
         do {
            progress = false;
 
+           brw_do_channel_expressions(shader->ir);
+           brw_do_vector_splitting(shader->ir);
+
+           progress = do_lower_jumps(shader->ir, true, true,
+                                     true, /* main return */
+                                     false, /* continue */
+                                     false /* loops */
+                                     ) || progress;
+
            progress = do_common_optimization(shader->ir, true, 32) || progress;
+
+           progress = lower_noise(shader->ir) || progress;
+           progress =
+              lower_variable_index_to_cond_assign(shader->ir,
+                                                  GL_TRUE, /* input */
+                                                  GL_TRUE, /* output */
+                                                  GL_TRUE, /* temp */
+                                                  GL_TRUE /* uniform */
+                                                  ) || progress;
         } while (progress);
 
         validate_ir_tree(shader->ir);
@@ -303,6 +320,8 @@ public:
       this->conditional_mod = BRW_CONDITIONAL_NONE;
       this->predicated = false;
       this->sampler = 0;
+      this->target = 0;
+      this->eot = false;
       this->shadow_compare = false;
    }
 
@@ -351,8 +370,10 @@ public:
    bool predicated;
    int conditional_mod; /**< BRW_CONDITIONAL_* */
 
-   int mlen; /** SEND message length */
+   int mlen; /**< SEND message length */
    int sampler;
+   int target; /**< MRT target. */
+   bool eot;
    bool shadow_compare;
 
    /** @{
@@ -372,6 +393,7 @@ public:
       this->c = c;
       this->p = &c->func;
       this->brw = p->brw;
+      this->fp = brw->fragment_program;
       this->intel = &brw->intel;
       this->ctx = &intel->ctx;
       this->mem_ctx = talloc_new(NULL);
@@ -390,6 +412,7 @@ public:
       this->current_annotation = NULL;
       this->annotation_string = NULL;
       this->annotation_ir = NULL;
+      this->base_ir = NULL;
    }
    ~fs_visitor()
    {
@@ -428,6 +451,8 @@ public:
    void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
    void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
    void generate_discard(fs_inst *inst);
+   void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
+   void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
 
    void emit_dummy_fs();
    void emit_interpolation();
@@ -435,8 +460,10 @@ public:
    void emit_fb_writes();
 
    struct brw_reg interp_reg(int location, int channel);
+   int setup_uniform_values(int loc, const glsl_type *type);
 
    struct brw_context *brw;
+   const struct gl_fragment_program *fp;
    struct intel_context *intel;
    GLcontext *ctx;
    struct brw_wm_compile *c;
@@ -481,6 +508,30 @@ fs_reg::fs_reg(enum register_file file, int hw_reg)
    this->type = BRW_REGISTER_TYPE_F;
 }
 
+int
+brw_type_for_base_type(const struct glsl_type *type)
+{
+   switch (type->base_type) {
+   case GLSL_TYPE_FLOAT:
+      return BRW_REGISTER_TYPE_F;
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_BOOL:
+      return BRW_REGISTER_TYPE_D;
+   case GLSL_TYPE_UINT:
+      return BRW_REGISTER_TYPE_UD;
+   case GLSL_TYPE_ARRAY:
+   case GLSL_TYPE_STRUCT:
+      /* These should be overridden with the type of the member when
+       * dereferenced into.  BRW_REGISTER_TYPE_UD seems like a likely
+       * way to trip up if we don't.
+       */
+      return BRW_REGISTER_TYPE_UD;
+   default:
+      assert(!"not reached");
+      return BRW_REGISTER_TYPE_F;
+   }
+}
+
 /** Automatic reg constructor. */
 fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
 {
@@ -490,67 +541,110 @@ fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
    this->reg = v->next_abstract_grf;
    this->reg_offset = 0;
    v->next_abstract_grf += type_size(type);
+   this->type = brw_type_for_base_type(type);
+}
+
+fs_reg *
+fs_visitor::variable_storage(ir_variable *var)
+{
+   return (fs_reg *)hash_table_find(this->variable_ht, var);
+}
+
+/* Our support for uniforms is piggy-backed on the struct
+ * gl_fragment_program, because that's where the values actually
+ * get stored, rather than in some global gl_shader_program uniform
+ * store.
+ */
+int
+fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
+{
+   unsigned int offset = 0;
+   float *vec_values;
+
+   if (type->is_matrix()) {
+      const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+                                                       type->vector_elements,
+                                                       1);
+
+      for (unsigned int i = 0; i < type->matrix_columns; i++) {
+        offset += setup_uniform_values(loc + offset, column);
+      }
+
+      return offset;
+   }
 
    switch (type->base_type) {
    case GLSL_TYPE_FLOAT:
-      this->type = BRW_REGISTER_TYPE_F;
-      break;
+   case GLSL_TYPE_UINT:
    case GLSL_TYPE_INT:
    case GLSL_TYPE_BOOL:
-      this->type = BRW_REGISTER_TYPE_D;
-      break;
-   case GLSL_TYPE_UINT:
-      this->type = BRW_REGISTER_TYPE_UD;
-      break;
+      vec_values = fp->Base.Parameters->ParameterValues[loc];
+      for (unsigned int i = 0; i < type->vector_elements; i++) {
+        c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
+      }
+      return 1;
+
+   case GLSL_TYPE_STRUCT:
+      for (unsigned int i = 0; i < type->length; i++) {
+        offset += setup_uniform_values(loc + offset,
+                                       type->fields.structure[i].type);
+      }
+      return offset;
+
+   case GLSL_TYPE_ARRAY:
+      for (unsigned int i = 0; i < type->length; i++) {
+        offset += setup_uniform_values(loc + offset, type->fields.array);
+      }
+      return offset;
+
+   case GLSL_TYPE_SAMPLER:
+      /* The sampler takes up a slot, but we don't use any values from it. */
+      return 1;
+
    default:
       assert(!"not reached");
-      this->type =  BRW_REGISTER_TYPE_F;
-      break;
+      return 0;
    }
 }
 
-fs_reg *
-fs_visitor::variable_storage(ir_variable *var)
-{
-   return (fs_reg *)hash_table_find(this->variable_ht, var);
-}
-
 void
 fs_visitor::visit(ir_variable *ir)
 {
    fs_reg *reg = NULL;
 
+   if (variable_storage(ir))
+      return;
+
    if (strcmp(ir->name, "gl_FragColor") == 0) {
       this->frag_color = ir;
    } else if (strcmp(ir->name, "gl_FragData") == 0) {
       this->frag_data = ir;
    } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
       this->frag_depth = ir;
-      assert(!"FINISHME: this hangs currently.");
    }
 
    if (ir->mode == ir_var_in) {
-      reg = &this->interp_attrs[ir->location];
+      if (strcmp(ir->name, "gl_FrontFacing") == 0) {
+        reg = new(this->mem_ctx) fs_reg(this, ir->type);
+        struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
+        /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
+         * us front face
+         */
+        fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
+                                     *reg,
+                                     fs_reg(r1_6ud),
+                                     fs_reg(1u << 31)));
+        inst->conditional_mod = BRW_CONDITIONAL_L;
+        emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
+      } else {
+        reg = &this->interp_attrs[ir->location];
+      }
    }
 
    if (ir->mode == ir_var_uniform) {
-      const float *vec_values;
       int param_index = c->prog_data.nr_params;
 
-      /* FINISHME: This is wildly incomplete. */
-      assert(ir->type->is_scalar() || ir->type->is_vector() ||
-            ir->type->is_sampler());
-
-      const struct gl_program *fp = &this->brw->fragment_program->Base;
-      /* Our support for uniforms is piggy-backed on the struct
-       * gl_fragment_program, because that's where the values actually
-       * get stored, rather than in some global gl_shader_program uniform
-       * store.
-       */
-      vec_values = fp->Parameters->ParameterValues[ir->location];
-      for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
-        c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
-      }
+      setup_uniform_values(ir->location, ir->type);
 
       reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
    }
@@ -571,7 +665,18 @@ fs_visitor::visit(ir_dereference_variable *ir)
 void
 fs_visitor::visit(ir_dereference_record *ir)
 {
-   assert(!"FINISHME");
+   const glsl_type *struct_type = ir->record->type;
+
+   ir->record->accept(this);
+
+   unsigned int offset = 0;
+   for (unsigned int i = 0; i < struct_type->length; i++) {
+      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
+        break;
+      offset += type_size(struct_type->fields.structure[i].type);
+   }
+   this->result.reg_offset += offset;
+   this->result.type = brw_type_for_base_type(ir->type);
 }
 
 void
@@ -587,6 +692,7 @@ fs_visitor::visit(ir_dereference_array *ir)
       element_size = ir->type->vector_elements;
    } else {
       element_size = type_size(ir->type);
+      this->result.type = brw_type_for_base_type(ir->type);
    }
 
    if (index) {
@@ -636,7 +742,7 @@ fs_visitor::visit(ir_expression *ir)
       emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
       break;
    case ir_unop_neg:
-      op[0].negate = ~op[0].negate;
+      op[0].negate = !op[0].negate;
       this->result = op[0];
       break;
    case ir_unop_abs:
@@ -859,7 +965,7 @@ fs_visitor::visit(ir_assignment *ir)
    if (ir->condition) {
       /* Get the condition bool into the predicate. */
       ir->condition->accept(this);
-      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, fs_reg(0)));
+      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0)));
       inst->conditional_mod = BRW_CONDITIONAL_NZ;
    }
 
@@ -948,6 +1054,12 @@ fs_visitor::visit(ir_texture *ir)
       break;
    }
 
+   inst->sampler =
+      _mesa_get_sampler_uniform_value(ir->sampler,
+                                     ctx->Shader.CurrentProgram,
+                                     &brw->fragment_program->Base);
+   inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler];
+
    this->result = dst;
 
    if (ir->shadow_comparitor)
@@ -1066,12 +1178,20 @@ fs_visitor::visit(ir_if *ir)
 void
 fs_visitor::visit(ir_loop *ir)
 {
-   assert(!ir->from);
-   assert(!ir->to);
-   assert(!ir->increment);
-   assert(!ir->counter);
+   fs_reg counter = reg_undef;
 
-   emit(fs_inst(BRW_OPCODE_DO));
+   if (ir->counter) {
+      this->base_ir = ir->counter;
+      ir->counter->accept(this);
+      counter = *(variable_storage(ir->counter));
+
+      if (ir->from) {
+        this->base_ir = ir->from;
+        ir->from->accept(this);
+
+        emit(fs_inst(BRW_OPCODE_MOV, counter, this->result));
+      }
+   }
 
    /* Start a safety counter.  If the user messed up their loop
     * counting, we don't want to hang the GPU.
@@ -1079,6 +1199,43 @@ fs_visitor::visit(ir_loop *ir)
    fs_reg max_iter = fs_reg(this, glsl_type::int_type);
    emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000)));
 
+   emit(fs_inst(BRW_OPCODE_DO));
+
+   if (ir->to) {
+      this->base_ir = ir->to;
+      ir->to->accept(this);
+
+      fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null,
+                                  counter, this->result));
+      switch (ir->cmp) {
+      case ir_binop_equal:
+        inst->conditional_mod = BRW_CONDITIONAL_Z;
+        break;
+      case ir_binop_nequal:
+        inst->conditional_mod = BRW_CONDITIONAL_NZ;
+        break;
+      case ir_binop_gequal:
+        inst->conditional_mod = BRW_CONDITIONAL_GE;
+        break;
+      case ir_binop_lequal:
+        inst->conditional_mod = BRW_CONDITIONAL_LE;
+        break;
+      case ir_binop_greater:
+        inst->conditional_mod = BRW_CONDITIONAL_G;
+        break;
+      case ir_binop_less:
+        inst->conditional_mod = BRW_CONDITIONAL_L;
+        break;
+      default:
+        assert(!"not reached: unknown loop condition");
+        this->fail = true;
+        break;
+      }
+
+      inst = emit(fs_inst(BRW_OPCODE_BREAK));
+      inst->predicated = true;
+   }
+
    foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
       ir_instruction *ir = (ir_instruction *)iter.get();
       fs_inst *inst;
@@ -1094,6 +1251,12 @@ fs_visitor::visit(ir_loop *ir)
       inst->predicated = true;
    }
 
+   if (ir->increment) {
+      this->base_ir = ir->increment;
+      ir->increment->accept(this);
+      emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result));
+   }
+
    emit(fs_inst(BRW_OPCODE_WHILE));
 }
 
@@ -1238,7 +1401,7 @@ fs_visitor::emit_interpolation()
    emit(fs_inst(BRW_OPCODE_ADD,
                this->delta_y,
                this->pixel_y,
-               fs_reg(brw_vec1_grf(1, 1))));
+               fs_reg(negate(brw_vec1_grf(1, 1)))));
 
    this->current_annotation = "compute pos.w and 1/pos.w";
    /* Compute wpos.  Unlike many other varying inputs, we usually need it
@@ -1259,8 +1422,6 @@ fs_visitor::emit_interpolation()
    this->pixel_w = fs_reg(this, glsl_type::float_type);
    emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos));
 
-   /* FINISHME: gl_FrontFacing */
-
    foreach_iter(exec_list_iterator, iter, *this->shader->ir) {
       ir_instruction *ir = (ir_instruction *)iter.get();
       ir_variable *var = ir->as_variable();
@@ -1314,21 +1475,76 @@ fs_visitor::emit_pinterp(int location)
 void
 fs_visitor::emit_fb_writes()
 {
-   this->current_annotation = "FB write";
+   this->current_annotation = "FB write header";
+   int nr = 0;
 
-   assert(this->frag_color || !"FINISHME: MRT");
-   fs_reg color = *(variable_storage(this->frag_color));
+   /* m0, m1 header */
+   nr += 2;
 
-   for (int i = 0; i < 4; i++) {
-      emit(fs_inst(BRW_OPCODE_MOV,
-                  fs_reg(MRF, 2 + i),
-                  color));
-      color.reg_offset++;
+   if (c->key.aa_dest_stencil_reg) {
+      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
+                  fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
    }
 
-   emit(fs_inst(FS_OPCODE_FB_WRITE,
-               fs_reg(0),
-               fs_reg(0)));
+   /* Reserve space for color. It'll be filled in per MRT below. */
+   int color_mrf = nr;
+   nr += 4;
+
+   if (c->key.source_depth_to_render_target) {
+      if (c->key.computes_depth) {
+        /* Hand over gl_FragDepth. */
+        assert(this->frag_depth);
+        fs_reg depth = *(variable_storage(this->frag_depth));
+
+        emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
+      } else {
+        /* Pass through the payload depth. */
+        emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
+                     fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
+      }
+   }
+
+   if (c->key.dest_depth_reg) {
+      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
+                  fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
+   }
+
+   fs_reg color = reg_undef;
+   if (this->frag_color)
+      color = *(variable_storage(this->frag_color));
+   else if (this->frag_data)
+      color = *(variable_storage(this->frag_data));
+
+   for (int target = 0; target < c->key.nr_color_regions; target++) {
+      this->current_annotation = talloc_asprintf(this->mem_ctx,
+                                                "FB write target %d",
+                                                target);
+      if (this->frag_color || this->frag_data) {
+        for (int i = 0; i < 4; i++) {
+           emit(fs_inst(BRW_OPCODE_MOV,
+                        fs_reg(MRF, color_mrf + i),
+                        color));
+           color.reg_offset++;
+        }
+      }
+
+      if (this->frag_color)
+        color.reg_offset -= 4;
+
+      fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
+                                  reg_undef, reg_undef));
+      inst->target = target;
+      inst->mlen = nr;
+      if (target == c->key.nr_color_regions - 1)
+        inst->eot = true;
+   }
+
+   if (c->key.nr_color_regions == 0) {
+      fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
+                                  reg_undef, reg_undef));
+      inst->mlen = nr;
+      inst->eot = true;
+   }
 
    this->current_annotation = NULL;
 }
@@ -1336,8 +1552,7 @@ fs_visitor::emit_fb_writes()
 void
 fs_visitor::generate_fb_write(fs_inst *inst)
 {
-   GLboolean eot = 1; /* FINISHME: MRT */
-   /* FINISHME: AADS */
+   GLboolean eot = inst->eot;
 
    /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
     * move, here's g1.
@@ -1350,15 +1565,13 @@ fs_visitor::generate_fb_write(fs_inst *inst)
           brw_vec8_grf(1, 0));
    brw_pop_insn_state(p);
 
-   int nr = 2 + 4;
-
    brw_fb_WRITE(p,
                8, /* dispatch_width */
                retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
                0, /* base MRF */
                retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
-               0, /* FINISHME: MRT target */
-               nr,
+               inst->target,
+               inst->mlen,
                0,
                eot);
 }
@@ -1496,6 +1709,69 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
              BRW_SAMPLER_SIMD_MODE_SIMD8);
 }
 
+
+/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
+ * looking like:
+ *
+ * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
+ *
+ * and we're trying to produce:
+ *
+ *           DDX                     DDY
+ * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
+ *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
+ *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
+ *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
+ *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
+ *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
+ *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
+ *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
+ *
+ * and add another set of two more subspans if in 16-pixel dispatch mode.
+ *
+ * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
+ * for each pair, and vertstride = 2 jumps us 2 elements after processing a
+ * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
+ * between each other.  We could probably do it like ddx and swizzle the right
+ * order later, but bail for now and just produce
+ * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ */
+void
+fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
+{
+   struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
+                                BRW_REGISTER_TYPE_F,
+                                BRW_VERTICAL_STRIDE_2,
+                                BRW_WIDTH_2,
+                                BRW_HORIZONTAL_STRIDE_0,
+                                BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+   struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
+                                BRW_REGISTER_TYPE_F,
+                                BRW_VERTICAL_STRIDE_2,
+                                BRW_WIDTH_2,
+                                BRW_HORIZONTAL_STRIDE_0,
+                                BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+   brw_ADD(p, dst, src0, negate(src1));
+}
+
+void
+fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
+{
+   struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
+                                BRW_REGISTER_TYPE_F,
+                                BRW_VERTICAL_STRIDE_4,
+                                BRW_WIDTH_4,
+                                BRW_HORIZONTAL_STRIDE_0,
+                                BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+   struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
+                                BRW_REGISTER_TYPE_F,
+                                BRW_VERTICAL_STRIDE_4,
+                                BRW_WIDTH_4,
+                                BRW_HORIZONTAL_STRIDE_0,
+                                BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+   brw_ADD(p, dst, src0, negate(src1));
+}
+
 void
 fs_visitor::generate_discard(fs_inst *inst)
 {
@@ -1724,6 +2000,7 @@ fs_visitor::generate_code()
       case BRW_OPCODE_IF:
         assert(if_stack_depth < 16);
         if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
+        if_depth_in_loop[loop_stack_depth]++;
         if_stack_depth++;
         break;
       case BRW_OPCODE_ELSE:
@@ -1733,6 +2010,7 @@ fs_visitor::generate_code()
       case BRW_OPCODE_ENDIF:
         if_stack_depth--;
         brw_ENDIF(p , if_stack[if_stack_depth]);
+        if_depth_in_loop[loop_stack_depth]--;
         break;
 
       case BRW_OPCODE_DO:
@@ -1795,6 +2073,12 @@ fs_visitor::generate_code()
       case FS_OPCODE_DISCARD:
         generate_discard(inst);
         break;
+      case FS_OPCODE_DDX:
+        generate_ddx(inst, dst, src[0]);
+        break;
+      case FS_OPCODE_DDY:
+        generate_ddy(inst, dst, src[0]);
+        break;
       case FS_OPCODE_FB_WRITE:
         generate_fb_write(inst);
         break;