i965: Fix missing "break;" in i2b/f2b, and missing AND of CMP result.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index 0353bb54e7236e919b691e9470fa5f1005a15aad..5e5d17504b76dcdb3a8b670087c0e4c8a9a3c1ee 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -91,8 +91,13 @@ GLboolean
  brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
  {
     struct intel_context *intel = intel_context(ctx);
-   if (using_new_fs == -1)
-      using_new_fs = getenv("INTEL_NEW_FS") != NULL;
+
+   if (using_new_fs == -1) {
+      if (intel->gen >= 6)
+        using_new_fs = 1;
+      else
+        using_new_fs = getenv("INTEL_NEW_FS") != NULL;
+   }
  
     for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
        struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
@@ -530,6 +535,18 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
        assert(!"not reached: bad math opcode");
        return NULL;
     }
+
+   /* Can't do hstride == 0 args to gen6 math, so expand it out.  We
+    * might be able to do better by doing execsize = 1 math and then
+    * expanding that result out, but we would need to be careful with
+    * masking.
+    */
+   if (intel->gen >= 6 && src.file == UNIFORM) {
+      fs_reg expanded = fs_reg(this, glsl_type::float_type);
+      emit(fs_inst(BRW_OPCODE_MOV, expanded, src));
+      src = expanded;
+   }
+
     fs_inst *inst = emit(fs_inst(opcode, dst, src));
  
     if (intel->gen < 6) {
@@ -549,6 +566,19 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
     assert(opcode == FS_OPCODE_POW);
  
     if (intel->gen >= 6) {
+      /* Can't do hstride == 0 args to gen6 math, so expand it out. */
+      if (src0.file == UNIFORM) {
+        fs_reg expanded = fs_reg(this, glsl_type::float_type);
+        emit(fs_inst(BRW_OPCODE_MOV, expanded, src0));
+        src0 = expanded;
+      }
+
+      if (src1.file == UNIFORM) {
+        fs_reg expanded = fs_reg(this, glsl_type::float_type);
+        emit(fs_inst(BRW_OPCODE_MOV, expanded, src1));
+        src1 = expanded;
+      }
+
        inst = emit(fs_inst(opcode, dst, src0, src1));
     } else {
        emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1));
@@ -824,8 +854,6 @@ fs_visitor::visit(ir_expression *ir)
     case ir_unop_i2f:
     case ir_unop_b2f:
     case ir_unop_b2i:
-      emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
-      break;
     case ir_unop_f2i:
        emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
        break;
@@ -833,6 +861,9 @@ fs_visitor::visit(ir_expression *ir)
     case ir_unop_i2b:
        inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
        inst->conditional_mod = BRW_CONDITIONAL_NZ;
+      inst = emit(fs_inst(BRW_OPCODE_AND, this->result,
+                         this->result, fs_reg(1)));
+      break;
  
     case ir_unop_trunc:
        emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
@@ -1554,19 +1585,28 @@ fs_visitor::emit_interpolation_setup_gen6()
  
     /* If the pixel centers end up used, the setup is the same as for gen4. */
     this->current_annotation = "compute pixel centers";
-   this->pixel_x = fs_reg(this, glsl_type::uint_type);
-   this->pixel_y = fs_reg(this, glsl_type::uint_type);
-   this->pixel_x.type = BRW_REGISTER_TYPE_UW;
-   this->pixel_y.type = BRW_REGISTER_TYPE_UW;
+   fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type);
+   fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type);
+   int_pixel_x.type = BRW_REGISTER_TYPE_UW;
+   int_pixel_y.type = BRW_REGISTER_TYPE_UW;
     emit(fs_inst(BRW_OPCODE_ADD,
-               this->pixel_x,
+               int_pixel_x,
                 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
                 fs_reg(brw_imm_v(0x10101010))));
     emit(fs_inst(BRW_OPCODE_ADD,
-               this->pixel_y,
+               int_pixel_y,
                 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
                 fs_reg(brw_imm_v(0x11001100))));
  
+   /* As of gen6, we can no longer mix float and int sources.  We have
+    * to turn the integer pixel centers into floats for their actual
+    * use.
+    */
+   this->pixel_x = fs_reg(this, glsl_type::float_type);
+   this->pixel_y = fs_reg(this, glsl_type::float_type);
+   emit(fs_inst(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x));
+   emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y));
+
     this->current_annotation = "compute 1/pos.w";
     this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0));
     this->pixel_w = fs_reg(this, glsl_type::float_type);