i965: Add "discard" support to the new FS backend.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
index 57c5f553418c7d868522c8ad7f2685f70dbcc3e2..fd65ab2fa778adfb417cf35e71834924c7f5d3aa 100644 (file)
@@ -26,6 +26,9 @@
  */
 
 extern "C" {
+
+#include <sys/types.h>
+
 #include "main/macros.h"
 #include "main/shaderobj.h"
 #include "program/prog_parameter.h"
@@ -46,7 +49,8 @@ enum register_file {
    GRF = BRW_GENERAL_REGISTER_FILE,
    MRF = BRW_MESSAGE_REGISTER_FILE,
    IMM = BRW_IMMEDIATE_VALUE,
-   FIXED_HW_REG,
+   FIXED_HW_REG, /* a struct brw_reg */
+   UNIFORM, /* prog_data->params[hw_reg] */
    BAD_FILE
 };
 
@@ -63,6 +67,10 @@ enum fs_opcodes {
    FS_OPCODE_DDX,
    FS_OPCODE_DDY,
    FS_OPCODE_LINTERP,
+   FS_OPCODE_TEX,
+   FS_OPCODE_TXB,
+   FS_OPCODE_TXL,
+   FS_OPCODE_DISCARD,
 };
 
 static int using_new_fs = -1;
@@ -73,9 +81,9 @@ brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
    struct brw_shader *shader;
 
    shader = talloc_zero(NULL, struct brw_shader);
-   shader->base.Type = type;
-   shader->base.Name = name;
    if (shader) {
+      shader->base.Type = type;
+      shader->base.Name = name;
       _mesa_init_shader(ctx, &shader->base);
    }
 
@@ -88,6 +96,7 @@ brw_new_shader_program(GLcontext *ctx, GLuint name)
    struct brw_shader_program *prog;
    prog = talloc_zero(NULL, struct brw_shader_program);
    if (prog) {
+      prog->base.Name = name;
       _mesa_init_shader_program(ctx, &prog->base);
    }
    return &prog->base;
@@ -115,10 +124,13 @@ brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
         void *mem_ctx = talloc_new(NULL);
         bool progress;
 
+        if (shader->ir)
+           talloc_free(shader->ir);
         shader->ir = new(shader) exec_list;
         clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
 
         do_mat_op_to_vec(shader->ir);
+        do_mod_to_fract(shader->ir);
         do_div_to_mul_rcp(shader->ir);
         do_sub_to_add_neg(shader->ir);
         do_explog_to_explog2(shader->ir);
@@ -132,7 +144,9 @@ brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
            progress = do_common_optimization(shader->ir, true) || progress;
         } while (progress);
 
-        reparent_ir(shader->ir, shader);
+        validate_ir_tree(shader->ir);
+
+        reparent_ir(shader->ir, shader->ir);
         talloc_free(mem_ctx);
       }
    }
@@ -153,16 +167,7 @@ type_size(const struct glsl_type *type)
    case GLSL_TYPE_INT:
    case GLSL_TYPE_FLOAT:
    case GLSL_TYPE_BOOL:
-      if (type->is_matrix()) {
-        /* In case of incoming uniform/varying matrices, match their
-         * allocation behavior.  FINISHME: We could just use
-         * glsl_type->components() for variables and temps within the
-         * shader.
-         */
-        return type->matrix_columns * 4;
-      } else {
-        return type->vector_elements;
-      }
+      return type->components();
    case GLSL_TYPE_ARRAY:
       /* FINISHME: uniform/varying arrays. */
       return type_size(type->fields.array) * type->length;
@@ -292,51 +297,58 @@ public:
    {
       void *node;
 
-      node = talloc_size(ctx, size);
+      node = talloc_zero_size(ctx, size);
       assert(node != NULL);
 
       return node;
    }
 
-   fs_inst()
+   void init()
    {
       this->opcode = BRW_OPCODE_NOP;
       this->saturate = false;
       this->conditional_mod = BRW_CONDITIONAL_NONE;
       this->predicated = false;
+      this->sampler = 0;
+      this->shadow_compare = false;
+   }
+
+   fs_inst()
+   {
+      init();
+   }
+
+   fs_inst(int opcode)
+   {
+      init();
+      this->opcode = opcode;
    }
 
    fs_inst(int opcode, fs_reg dst, fs_reg src0)
    {
+      init();
       this->opcode = opcode;
       this->dst = dst;
       this->src[0] = src0;
-      this->saturate = false;
-      this->conditional_mod = BRW_CONDITIONAL_NONE;
-      this->predicated = false;
    }
 
    fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
    {
+      init();
       this->opcode = opcode;
       this->dst = dst;
       this->src[0] = src0;
       this->src[1] = src1;
-      this->saturate = false;
-      this->conditional_mod = BRW_CONDITIONAL_NONE;
-      this->predicated = false;
    }
 
    fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
    {
+      init();
       this->opcode = opcode;
       this->dst = dst;
       this->src[0] = src0;
       this->src[1] = src1;
       this->src[2] = src2;
-      this->saturate = false;
-      this->conditional_mod = BRW_CONDITIONAL_NONE;
-      this->predicated = false;
    }
 
    int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
@@ -345,6 +357,17 @@ public:
    bool saturate;
    bool predicated;
    int conditional_mod; /**< BRW_CONDITIONAL_* */
+
+   int mlen; /** SEND message length */
+   int sampler;
+   bool shadow_compare;
+
+   /** @{
+    * Annotation for the generated IR.  One of the two can be set.
+    */
+   ir_instruction *ir;
+   const char *annotation;
+   /** @} */
 };
 
 class fs_visitor : public ir_visitor
@@ -357,6 +380,7 @@ public:
       this->p = &c->func;
       this->brw = p->brw;
       this->intel = &brw->intel;
+      this->ctx = &intel->ctx;
       this->mem_ctx = talloc_new(NULL);
       this->shader = shader;
       this->fail = false;
@@ -369,6 +393,10 @@ public:
       this->frag_data = NULL;
       this->frag_depth = NULL;
       this->first_non_payload_grf = 0;
+
+      this->current_annotation = NULL;
+      this->annotation_string = NULL;
+      this->annotation_ir = NULL;
    }
    ~fs_visitor()
    {
@@ -397,13 +425,16 @@ public:
    void visit(ir_function_signature *ir);
 
    fs_inst *emit(fs_inst inst);
+   void assign_curb_setup();
    void assign_urb_setup();
    void assign_regs();
    void generate_code();
    void generate_fb_write(fs_inst *inst);
    void generate_linterp(fs_inst *inst, struct brw_reg dst,
                         struct brw_reg *src);
+   void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
    void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
+   void generate_discard(fs_inst *inst);
 
    void emit_dummy_fs();
    void emit_interpolation();
@@ -414,6 +445,7 @@ public:
 
    struct brw_context *brw;
    struct intel_context *intel;
+   GLcontext *ctx;
    struct brw_wm_compile *c;
    struct brw_compile *p;
    struct brw_shader *shader;
@@ -424,6 +456,13 @@ public:
    ir_variable *frag_color, *frag_data, *frag_depth;
    int first_non_payload_grf;
 
+   /** @{ debug annotation info */
+   const char *current_annotation;
+   ir_instruction *base_ir;
+   const char **annotation_string;
+   ir_instruction **annotation_ir;
+   /** @} */
+
    bool fail;
 
    /* Result of last visit() method. */
@@ -492,21 +531,41 @@ fs_visitor::visit(ir_variable *ir)
 {
    fs_reg *reg = NULL;
 
-   /* FINISHME */
-   assert(ir->mode != ir_var_uniform);
-
    if (strcmp(ir->name, "gl_FragColor") == 0) {
       this->frag_color = ir;
    } else if (strcmp(ir->name, "gl_FragData") == 0) {
       this->frag_data = ir;
    } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
       this->frag_depth = ir;
+      assert(!"FINISHME: this hangs currently.");
    }
 
    if (ir->mode == ir_var_in) {
       reg = &this->interp_attrs[ir->location];
    }
 
+   if (ir->mode == ir_var_uniform) {
+      const float *vec_values;
+      int param_index = c->prog_data.nr_params;
+
+      /* FINISHME: This is wildly incomplete. */
+      assert(ir->type->is_scalar() || ir->type->is_vector() ||
+            ir->type->is_sampler());
+
+      const struct gl_program *fp = &this->brw->fragment_program->Base;
+      /* Our support for uniforms is piggy-backed on the struct
+       * gl_fragment_program, because that's where the values actually
+       * get stored, rather than in some global gl_shader_program uniform
+       * store.
+       */
+      vec_values = fp->Parameters->ParameterValues[ir->location];
+      for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
+        c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
+      }
+
+      reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
+   }
+
    if (!reg)
       reg = new(this->mem_ctx) fs_reg(this, ir->type);
 
@@ -529,7 +588,26 @@ fs_visitor::visit(ir_dereference_record *ir)
 void
 fs_visitor::visit(ir_dereference_array *ir)
 {
-   assert(!"FINISHME");
+   ir_constant *index;
+   int element_size;
+
+   ir->array->accept(this);
+   index = ir->array_index->as_constant();
+
+   if (ir->type->is_matrix()) {
+      element_size = ir->type->vector_elements;
+   } else {
+      element_size = type_size(ir->type);
+   }
+
+   if (index) {
+      assert(this->result.file == UNIFORM ||
+            (this->result.file == GRF &&
+             this->result.reg != 0));
+      this->result.reg_offset += index->value.i[0] * element_size;
+   } else {
+      assert(!"FINISHME: non-constant matrix column");
+   }
 }
 
 void
@@ -569,24 +647,27 @@ fs_visitor::visit(ir_expression *ir)
       emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
       break;
    case ir_unop_neg:
-      this->result = op[0];
       op[0].negate = ~op[0].negate;
+      this->result = op[0];
       break;
    case ir_unop_abs:
-      this->result = op[0];
       op[0].abs = true;
+      this->result = op[0];
       break;
    case ir_unop_sign:
       temp = fs_reg(this, ir->type);
 
-      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
+      emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
+
+      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
       inst->conditional_mod = BRW_CONDITIONAL_G;
+      inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
+      inst->predicated = true;
 
-      inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f)));
+      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
       inst->conditional_mod = BRW_CONDITIONAL_L;
-
-      temp.negate = true;
-      emit(fs_inst(BRW_OPCODE_ADD, this->result, this->result, temp));
+      inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
+      inst->predicated = true;
 
       break;
    case ir_unop_rcp:
@@ -637,26 +718,32 @@ fs_visitor::visit(ir_expression *ir)
    case ir_binop_less:
       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
       inst->conditional_mod = BRW_CONDITIONAL_L;
+      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
       break;
    case ir_binop_greater:
       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
       inst->conditional_mod = BRW_CONDITIONAL_G;
+      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
       break;
    case ir_binop_lequal:
       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
       inst->conditional_mod = BRW_CONDITIONAL_LE;
+      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
       break;
    case ir_binop_gequal:
       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
       inst->conditional_mod = BRW_CONDITIONAL_GE;
+      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
       break;
    case ir_binop_equal:
       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
       inst->conditional_mod = BRW_CONDITIONAL_Z;
+      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
       break;
    case ir_binop_nequal:
       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
       inst->conditional_mod = BRW_CONDITIONAL_NZ;
+      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
       break;
 
    case ir_binop_logic_xor:
@@ -782,12 +869,11 @@ fs_visitor::visit(ir_assignment *ir)
    }
 
    for (i = 0; i < type_size(ir->lhs->type); i++) {
-      if (i < 4 && !(write_mask & (1 << i)))
-        continue;
-
-      inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
-      if (ir->condition)
-        inst->predicated = true;
+      if (i >= 4 || (write_mask & (1 << i))) {
+        inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
+        if (ir->condition)
+           inst->predicated = true;
+      }
       l.reg_offset++;
       r.reg_offset++;
    }
@@ -796,7 +882,82 @@ fs_visitor::visit(ir_assignment *ir)
 void
 fs_visitor::visit(ir_texture *ir)
 {
-   assert(!"FINISHME");
+   int base_mrf = 2;
+   fs_inst *inst = NULL;
+   unsigned int mlen = 0;
+
+   ir->coordinate->accept(this);
+   fs_reg coordinate = this->result;
+
+   if (ir->projector) {
+      fs_reg inv_proj = fs_reg(this, glsl_type::float_type);
+
+      ir->projector->accept(this);
+      emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result));
+
+      fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type);
+      for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+        emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj));
+        coordinate.reg_offset++;
+        proj_coordinate.reg_offset++;
+      }
+      proj_coordinate.reg_offset = 0;
+
+      coordinate = proj_coordinate;
+   }
+
+   for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
+      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
+      coordinate.reg_offset++;
+   }
+
+   /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
+   if (intel->gen < 5)
+      mlen = 3;
+
+   if (ir->shadow_comparitor) {
+      /* For shadow comparisons, we have to supply u,v,r. */
+      mlen = 3;
+
+      ir->shadow_comparitor->accept(this);
+      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
+      mlen++;
+   }
+
+   /* Do we ever want to handle writemasking on texture samples?  Is it
+    * performance relevant?
+    */
+   fs_reg dst = fs_reg(this, glsl_type::vec4_type);
+
+   switch (ir->op) {
+   case ir_tex:
+      inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
+      break;
+   case ir_txb:
+      ir->lod_info.bias->accept(this);
+      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
+      mlen++;
+
+      inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
+      break;
+   case ir_txl:
+      ir->lod_info.lod->accept(this);
+      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
+      mlen++;
+
+      inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
+      break;
+   case ir_txd:
+   case ir_txf:
+      assert(!"GLSL 1.30 features unsupported");
+      break;
+   }
+
+   this->result = dst;
+
+   if (ir->shadow_comparitor)
+      inst->shadow_compare = true;
+   inst->mlen = mlen;
 }
 
 void
@@ -836,7 +997,9 @@ fs_visitor::visit(ir_swizzle *ir)
 void
 fs_visitor::visit(ir_discard *ir)
 {
-   assert(!"FINISHME");
+   assert(ir->condition == NULL); /* FINISHME */
+
+   emit(fs_inst(FS_OPCODE_DISCARD));
 }
 
 void
@@ -869,7 +1032,40 @@ fs_visitor::visit(ir_constant *ir)
 void
 fs_visitor::visit(ir_if *ir)
 {
-   assert(!"FINISHME");
+   fs_inst *inst;
+
+   /* Don't point the annotation at the if statement, because then it plus
+    * the then and else blocks get printed.
+    */
+   this->base_ir = ir->condition;
+
+   /* Generate the condition into the condition code. */
+   ir->condition->accept(this);
+   inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
+   inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+   inst = emit(fs_inst(BRW_OPCODE_IF));
+   inst->predicated = true;
+
+   foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
+      ir_instruction *ir = (ir_instruction *)iter.get();
+      this->base_ir = ir;
+
+      ir->accept(this);
+   }
+
+   if (!ir->else_instructions.is_empty()) {
+      emit(fs_inst(BRW_OPCODE_ELSE));
+
+      foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
+        ir_instruction *ir = (ir_instruction *)iter.get();
+        this->base_ir = ir;
+
+        ir->accept(this);
+      }
+   }
+
+   emit(fs_inst(BRW_OPCODE_ENDIF));
 }
 
 void
@@ -912,6 +1108,7 @@ fs_visitor::visit(ir_function *ir)
 
       foreach_iter(exec_list_iterator, iter, sig->body) {
         ir_instruction *ir = (ir_instruction *)iter.get();
+        this->base_ir = ir;
 
         ir->accept(this);
       }
@@ -931,6 +1128,9 @@ fs_visitor::emit(fs_inst inst)
    fs_inst *list_inst = new(mem_ctx) fs_inst;
    *list_inst = inst;
 
+   list_inst->annotation = this->current_annotation;
+   list_inst->ir = this->base_ir;
+
    this->instructions.push_tail(list_inst);
 
    return list_inst;
@@ -984,7 +1184,7 @@ fs_visitor::emit_interpolation()
     */
    fs_reg src_reg = reg_undef;
 
-   /* Compute the pixel centers. */
+   this->current_annotation = "compute pixel centers";
    this->pixel_x = fs_reg(this, glsl_type::uint_type);
    this->pixel_y = fs_reg(this, glsl_type::uint_type);
    emit(fs_inst(BRW_OPCODE_ADD,
@@ -996,7 +1196,7 @@ fs_visitor::emit_interpolation()
                fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
                fs_reg(brw_imm_v(0x11001100))));
 
-   /* Compute the offsets from vertex 0 to the pixel centers */
+   this->current_annotation = "compute pixel deltas from v0";
    this->delta_x = fs_reg(this, glsl_type::float_type);
    this->delta_y = fs_reg(this, glsl_type::float_type);
    emit(fs_inst(BRW_OPCODE_ADD,
@@ -1008,6 +1208,7 @@ fs_visitor::emit_interpolation()
                this->pixel_y,
                fs_reg(brw_vec1_grf(1, 1))));
 
+   this->current_annotation = "compute pos.w and 1/pos.w";
    /* Compute wpos.  Unlike many other varying inputs, we usually need it
     * to produce 1/w, and the varying variable wouldn't show up.
     */
@@ -1042,8 +1243,14 @@ fs_visitor::emit_interpolation()
       if (var->location == 0)
         continue;
 
+      this->current_annotation = talloc_asprintf(this->mem_ctx,
+                                                "interpolate %s "
+                                                "(FRAG_ATTRIB[%d])",
+                                                var->name,
+                                                var->location);
       emit_pinterp(var->location);
    }
+   this->current_annotation = NULL;
 }
 
 void
@@ -1075,6 +1282,8 @@ fs_visitor::emit_pinterp(int location)
 void
 fs_visitor::emit_fb_writes()
 {
+   this->current_annotation = "FB write";
+
    assert(this->frag_color || !"FINISHME: MRT");
    fs_reg color = *(variable_storage(this->frag_color));
 
@@ -1088,6 +1297,8 @@ fs_visitor::emit_fb_writes()
    emit(fs_inst(FS_OPCODE_FB_WRITE,
                fs_reg(0),
                fs_reg(0)));
+
+   this->current_annotation = NULL;
 }
 
 void
@@ -1175,7 +1386,6 @@ fs_visitor::generate_math(fs_inst *inst,
       break;
    }
 
-   brw_MOV(p, brw_message_reg(2), src[0]);
    if (inst->opcode == FS_OPCODE_POW) {
       brw_MOV(p, brw_message_reg(3), src[1]);
    }
@@ -1184,11 +1394,87 @@ fs_visitor::generate_math(fs_inst *inst,
            op,
            inst->saturate ? BRW_MATH_SATURATE_SATURATE :
            BRW_MATH_SATURATE_NONE,
-           2, brw_null_reg(),
+           2, src[0],
            BRW_MATH_DATA_VECTOR,
            BRW_MATH_PRECISION_FULL);
 }
 
+void
+fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
+{
+   int msg_type = -1;
+   int rlen = 4;
+
+   if (intel->gen == 5) {
+      switch (inst->opcode) {
+      case FS_OPCODE_TEX:
+        if (inst->shadow_compare) {
+           msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
+        } else {
+           msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
+        }
+        break;
+      case FS_OPCODE_TXB:
+        if (inst->shadow_compare) {
+           msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
+        } else {
+           msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
+        }
+        break;
+      }
+   } else {
+      switch (inst->opcode) {
+      case FS_OPCODE_TEX:
+        /* Note that G45 and older determines shadow compare and dispatch width
+         * from message length for most messages.
+         */
+        if (inst->shadow_compare) {
+           msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
+        } else {
+           msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
+        }
+      case FS_OPCODE_TXB:
+        if (inst->shadow_compare) {
+           assert(!"FINISHME: shadow compare with bias.");
+           msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+        } else {
+           msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+           rlen = 8;
+        }
+        break;
+      }
+   }
+   assert(msg_type != -1);
+
+   /* g0 header. */
+   src.nr--;
+
+   brw_SAMPLE(p,
+             retype(dst, BRW_REGISTER_TYPE_UW),
+             src.nr,
+             retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+              SURF_INDEX_TEXTURE(inst->sampler),
+             inst->sampler,
+             WRITEMASK_XYZW,
+             msg_type,
+             rlen,
+             inst->mlen + 1,
+             0,
+             1,
+             BRW_SAMPLER_SIMD_MODE_SIMD8);
+}
+
+void
+fs_visitor::generate_discard(fs_inst *inst)
+{
+   struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
+   brw_AND(p, g0, c->emit_mask_reg, g0);
+   brw_pop_insn_state(p);
+}
+
 static void
 trivial_assign_reg(int header_size, fs_reg *reg)
 {
@@ -1198,10 +1484,34 @@ trivial_assign_reg(int header_size, fs_reg *reg)
    }
 }
 
+void
+fs_visitor::assign_curb_setup()
+{
+   c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
+   c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
+
+   /* Map the offsets in the UNIFORM file to fixed HW regs. */
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      fs_inst *inst = (fs_inst *)iter.get();
+
+      for (unsigned int i = 0; i < 3; i++) {
+        if (inst->src[i].file == UNIFORM) {
+           int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+           struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
+                                                 constant_nr / 8,
+                                                 constant_nr % 8);
+
+           inst->src[i].file = FIXED_HW_REG;
+           inst->src[i].fixed_hw_reg = brw_reg;
+        }
+      }
+   }
+}
+
 void
 fs_visitor::assign_urb_setup()
 {
-   int urb_start = c->key.nr_payload_regs; /* FINISHME: push constants */
+   int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
    int interp_reg_nr[FRAG_ATTRIB_MAX];
 
    c->prog_data.urb_read_length = 0;
@@ -1258,7 +1568,7 @@ fs_visitor::assign_regs()
       last_grf = MAX2(last_grf, inst->src[1].hw_reg);
    }
 
-   this->grf_used = last_grf;
+   this->grf_used = last_grf + 1;
 }
 
 static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
@@ -1279,10 +1589,10 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
         brw_reg = brw_imm_f(reg->imm.f);
         break;
       case BRW_REGISTER_TYPE_D:
-        brw_reg = brw_imm_f(reg->imm.i);
+        brw_reg = brw_imm_d(reg->imm.i);
         break;
       case BRW_REGISTER_TYPE_UD:
-        brw_reg = brw_imm_f(reg->imm.u);
+        brw_reg = brw_imm_ud(reg->imm.u);
         break;
       default:
         assert(!"not reached");
@@ -1295,6 +1605,11 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
    case BAD_FILE:
       /* Probably unused. */
       brw_reg = brw_null_reg();
+      break;
+   case UNIFORM:
+      assert(!"not reached");
+      brw_reg = brw_null_reg();
+      break;
    }
    if (reg->abs)
       brw_reg = brw_abs(brw_reg);
@@ -1307,6 +1622,12 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
 void
 fs_visitor::generate_code()
 {
+   unsigned int annotation_len = 0;
+   int last_native_inst = 0;
+   struct brw_instruction *if_stack[16];
+   int if_stack_depth = 0;
+
+   memset(&if_stack, 0, sizeof(if_stack));
    foreach_iter(exec_list_iterator, iter, this->instructions) {
       fs_inst *inst = (fs_inst *)iter.get();
       struct brw_reg src[3], dst;
@@ -1329,6 +1650,47 @@ fs_visitor::generate_code()
       case BRW_OPCODE_MUL:
         brw_MUL(p, dst, src[0], src[1]);
         break;
+
+      case BRW_OPCODE_FRC:
+        brw_FRC(p, dst, src[0]);
+        break;
+      case BRW_OPCODE_RNDD:
+        brw_RNDD(p, dst, src[0]);
+        break;
+      case BRW_OPCODE_RNDZ:
+        brw_RNDZ(p, dst, src[0]);
+        break;
+
+      case BRW_OPCODE_AND:
+        brw_AND(p, dst, src[0], src[1]);
+        break;
+      case BRW_OPCODE_OR:
+        brw_OR(p, dst, src[0], src[1]);
+        break;
+      case BRW_OPCODE_XOR:
+        brw_XOR(p, dst, src[0], src[1]);
+        break;
+
+      case BRW_OPCODE_CMP:
+        brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
+        break;
+      case BRW_OPCODE_SEL:
+        brw_SEL(p, dst, src[0], src[1]);
+        break;
+
+      case BRW_OPCODE_IF:
+        assert(if_stack_depth < 16);
+        if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
+        if_stack_depth++;
+        break;
+      case BRW_OPCODE_ELSE:
+        if_stack[if_stack_depth - 1] =
+           brw_ELSE(p, if_stack[if_stack_depth - 1]);
+        break;
+      case BRW_OPCODE_ENDIF:
+        if_stack_depth--;
+        brw_ENDIF(p , if_stack[if_stack_depth]);
+        break;
       case FS_OPCODE_RCP:
       case FS_OPCODE_RSQ:
       case FS_OPCODE_SQRT:
@@ -1342,12 +1704,47 @@ fs_visitor::generate_code()
       case FS_OPCODE_LINTERP:
         generate_linterp(inst, dst, src);
         break;
+      case FS_OPCODE_TEX:
+      case FS_OPCODE_TXB:
+      case FS_OPCODE_TXL:
+        generate_tex(inst, dst, src[0]);
+        break;
+      case FS_OPCODE_DISCARD:
+        generate_discard(inst);
+        break;
       case FS_OPCODE_FB_WRITE:
         generate_fb_write(inst);
         break;
       default:
-        assert(!"not reached");
+        if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
+           _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
+                         brw_opcodes[inst->opcode].name);
+        } else {
+           _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
+        }
+        this->fail = true;
+      }
+
+      if (annotation_len < p->nr_insn) {
+        annotation_len *= 2;
+        if (annotation_len < 16)
+           annotation_len = 16;
+
+        this->annotation_string = talloc_realloc(this->mem_ctx,
+                                                 annotation_string,
+                                                 const char *,
+                                                 annotation_len);
+        this->annotation_ir = talloc_realloc(this->mem_ctx,
+                                             annotation_ir,
+                                             ir_instruction *,
+                                             annotation_len);
+      }
+
+      for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
+        this->annotation_string[i] = inst->annotation;
+        this->annotation_ir[i] = inst->ir;
       }
+      last_native_inst = p->nr_insn;
    }
 }
 
@@ -1401,12 +1798,17 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
       /* Generate FS IR for main().  (the visitor only descends into
        * functions called "main").
        */
-      visit_exec_list(shader->ir, &v);
+      foreach_iter(exec_list_iterator, iter, *shader->ir) {
+        ir_instruction *ir = (ir_instruction *)iter.get();
+        v.base_ir = ir;
+        ir->accept(&v);
+      }
 
       if (v.fail)
         return GL_FALSE;
 
       v.emit_fb_writes();
+      v.assign_curb_setup();
       v.assign_urb_setup();
       v.assign_regs();
    }
@@ -1414,15 +1816,29 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
    v.generate_code();
 
    if (INTEL_DEBUG & DEBUG_WM) {
+      const char *last_annotation_string = NULL;
+      ir_instruction *last_annotation_ir = NULL;
+
       printf("Native code for fragment shader %d:\n", prog->Name);
-      for (unsigned int i = 0; i < p->nr_insn; i++)
+      for (unsigned int i = 0; i < p->nr_insn; i++) {
+        if (last_annotation_ir != v.annotation_ir[i]) {
+           last_annotation_ir = v.annotation_ir[i];
+           if (last_annotation_ir) {
+              printf("   ");
+              last_annotation_ir->print();
+              printf("\n");
+           }
+        }
+        if (last_annotation_string != v.annotation_string[i]) {
+           last_annotation_string = v.annotation_string[i];
+           if (last_annotation_string)
+              printf("   %s\n", last_annotation_string);
+        }
         brw_disasm(stdout, &p->store[i], intel->gen);
+      }
       printf("\n");
    }
 
-   c->prog_data.nr_params = 0; /* FINISHME */
-   c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
-   c->prog_data.curb_read_length = 0; /* FINISHME */
    c->prog_data.total_grf = v.grf_used;
    c->prog_data.total_scratch = 0;