i965: Add support for rescaling GL_TEXTURE_RECTANGLE coords to new FS.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index 0f1f4fa26ef88d4902673e95b1a9fcaf92eba2d2..7119f971f82ad7d1f471ec4a18c6d9117e04ed53 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -52,7 +52,7 @@ static int using_new_fs = -1;
  static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
  
  struct gl_shader *
-brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
+brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
  {
     struct brw_shader *shader;
  
@@ -67,7 +67,7 @@ brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
  }
  
  struct gl_shader_program *
-brw_new_shader_program(GLcontext *ctx, GLuint name)
+brw_new_shader_program(struct gl_context *ctx, GLuint name)
  {
     struct brw_shader_program *prog;
     prog = talloc_zero(NULL, struct brw_shader_program);
@@ -79,7 +79,7 @@ brw_new_shader_program(GLcontext *ctx, GLuint name)
  }
  
  GLboolean
-brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
+brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader)
  {
     if (!_mesa_ir_compile_shader(ctx, shader))
        return GL_FALSE;
@@ -88,11 +88,16 @@ brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
  }
  
  GLboolean
-brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
+brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
  {
     struct intel_context *intel = intel_context(ctx);
-   if (using_new_fs == -1)
-      using_new_fs = getenv("INTEL_NEW_FS") != NULL;
+
+   if (using_new_fs == -1) {
+      if (intel->gen >= 6)
+        using_new_fs = 1;
+      else
+        using_new_fs = getenv("INTEL_NEW_FS") != NULL;
+   }
  
     for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
        struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
@@ -530,6 +535,18 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
        assert(!"not reached: bad math opcode");
        return NULL;
     }
+
+   /* Can't do hstride == 0 args to gen6 math, so expand it out.  We
+    * might be able to do better by doing execsize = 1 math and then
+    * expanding that result out, but we would need to be careful with
+    * masking.
+    */
+   if (intel->gen >= 6 && src.file == UNIFORM) {
+      fs_reg expanded = fs_reg(this, glsl_type::float_type);
+      emit(fs_inst(BRW_OPCODE_MOV, expanded, src));
+      src = expanded;
+   }
+
     fs_inst *inst = emit(fs_inst(opcode, dst, src));
  
     if (intel->gen < 6) {
@@ -549,6 +566,19 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
     assert(opcode == FS_OPCODE_POW);
  
     if (intel->gen >= 6) {
+      /* Can't do hstride == 0 args to gen6 math, so expand it out. */
+      if (src0.file == UNIFORM) {
+        fs_reg expanded = fs_reg(this, glsl_type::float_type);
+        emit(fs_inst(BRW_OPCODE_MOV, expanded, src0));
+        src0 = expanded;
+      }
+
+      if (src1.file == UNIFORM) {
+        fs_reg expanded = fs_reg(this, glsl_type::float_type);
+        emit(fs_inst(BRW_OPCODE_MOV, expanded, src1));
+        src1 = expanded;
+      }
+
        inst = emit(fs_inst(opcode, dst, src0, src1));
     } else {
        emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1));
@@ -824,8 +854,6 @@ fs_visitor::visit(ir_expression *ir)
     case ir_unop_i2f:
     case ir_unop_b2f:
     case ir_unop_b2i:
-      emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
-      break;
     case ir_unop_f2i:
        emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
        break;
@@ -833,6 +861,9 @@ fs_visitor::visit(ir_expression *ir)
     case ir_unop_i2b:
        inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
        inst->conditional_mod = BRW_CONDITIONAL_NZ;
+      inst = emit(fs_inst(BRW_OPCODE_AND, this->result,
+                         this->result, fs_reg(1)));
+      break;
  
     case ir_unop_trunc:
        emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
@@ -1146,6 +1177,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
  void
  fs_visitor::visit(ir_texture *ir)
  {
+   int sampler;
     fs_inst *inst = NULL;
  
     ir->coordinate->accept(this);
@@ -1154,6 +1186,44 @@ fs_visitor::visit(ir_texture *ir)
     /* Should be lowered by do_lower_texture_projection */
     assert(!ir->projector);
  
+   sampler = _mesa_get_sampler_uniform_value(ir->sampler,
+                                            ctx->Shader.CurrentProgram,
+                                            &brw->fragment_program->Base);
+   sampler = c->fp->program.Base.SamplerUnits[sampler];
+
+   /* The 965 requires the EU to do the normalization of GL rectangle
+    * texture coordinates.  We use the program parameter state
+    * tracking to get the scaling factor.
+    */
+   if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
+      struct gl_program_parameter_list *params = c->fp->program.Base.Parameters;
+      int tokens[STATE_LENGTH] = {
+        STATE_INTERNAL,
+        STATE_TEXRECT_SCALE,
+        sampler,
+        0,
+        0
+      };
+
+      fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params);
+      fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1);
+      GLuint index = _mesa_add_state_reference(params,
+                                              (gl_state_index *)tokens);
+      float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
+
+      c->prog_data.param[c->prog_data.nr_params++] = &vec_values[0];
+      c->prog_data.param[c->prog_data.nr_params++] = &vec_values[1];
+
+      fs_reg dst = fs_reg(this, ir->coordinate->type);
+      fs_reg src = coordinate;
+      coordinate = dst;
+
+      emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_x));
+      dst.reg_offset++;
+      src.reg_offset++;
+      emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_y));
+   }
+
     /* Writemasking doesn't eliminate channels on SIMD8 texture
      * samples, so don't worry about them.
      */
@@ -1165,11 +1235,7 @@ fs_visitor::visit(ir_texture *ir)
        inst = emit_texture_gen5(ir, dst, coordinate);
     }
  
-   inst->sampler =
-      _mesa_get_sampler_uniform_value(ir->sampler,
-                                     ctx->Shader.CurrentProgram,
-                                     &brw->fragment_program->Base);
-   inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler];
+   inst->sampler = sampler;
  
     this->result = dst;
  
@@ -1554,19 +1620,28 @@ fs_visitor::emit_interpolation_setup_gen6()
  
     /* If the pixel centers end up used, the setup is the same as for gen4. */
     this->current_annotation = "compute pixel centers";
-   this->pixel_x = fs_reg(this, glsl_type::uint_type);
-   this->pixel_y = fs_reg(this, glsl_type::uint_type);
-   this->pixel_x.type = BRW_REGISTER_TYPE_UW;
-   this->pixel_y.type = BRW_REGISTER_TYPE_UW;
+   fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type);
+   fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type);
+   int_pixel_x.type = BRW_REGISTER_TYPE_UW;
+   int_pixel_y.type = BRW_REGISTER_TYPE_UW;
     emit(fs_inst(BRW_OPCODE_ADD,
-               this->pixel_x,
+               int_pixel_x,
                 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
                 fs_reg(brw_imm_v(0x10101010))));
     emit(fs_inst(BRW_OPCODE_ADD,
-               this->pixel_y,
+               int_pixel_y,
                 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
                 fs_reg(brw_imm_v(0x11001100))));
  
+   /* As of gen6, we can no longer mix float and int sources.  We have
+    * to turn the integer pixel centers into floats for their actual
+    * use.
+    */
+   this->pixel_x = fs_reg(this, glsl_type::float_type);
+   this->pixel_y = fs_reg(this, glsl_type::float_type);
+   emit(fs_inst(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x));
+   emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y));
+
     this->current_annotation = "compute 1/pos.w";
     this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0));
     this->pixel_w = fs_reg(this, glsl_type::float_type);
@@ -2616,6 +2691,22 @@ fs_visitor::compute_to_mrf()
             if (scan_inst->mlen)
                break;
  
+           if (intel->gen >= 6) {
+              /* gen6 math instructions must have the destination be
+               * GRF, so no compute-to-MRF for them.
+               */
+              if (scan_inst->opcode == FS_OPCODE_RCP ||
+                  scan_inst->opcode == FS_OPCODE_RSQ ||
+                  scan_inst->opcode == FS_OPCODE_SQRT ||
+                  scan_inst->opcode == FS_OPCODE_EXP2 ||
+                  scan_inst->opcode == FS_OPCODE_LOG2 ||
+                  scan_inst->opcode == FS_OPCODE_SIN ||
+                  scan_inst->opcode == FS_OPCODE_COS ||
+                  scan_inst->opcode == FS_OPCODE_POW) {
+                 break;
+              }
+           }
+
             if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
                /* Found the creator of our MRF's source value. */
                found = true;
@@ -2902,7 +2993,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
  {
     struct brw_compile *p = &c->func;
     struct intel_context *intel = &brw->intel;
-   GLcontext *ctx = &intel->ctx;
+   struct gl_context *ctx = &intel->ctx;
     struct brw_shader *shader = NULL;
     struct gl_shader_program *prog = ctx->Shader.CurrentProgram;